library(tidyverse)
library(stargazer)
library(tidytext)
library(matrixStats)
library(haven)
library(urltools)


## ggplot settings
theme_set(theme_classic())

just_domain <- function(x) suffix_extract(url_parse(x)$domain)$domain


alexa355 <- read.csv(
  "../data/politicaldomains-final-edits.csv") %>%
  select(domain, type)
alexa355$domain <- just_domain(alexa355$domain)
alexa355 <- alexa355 %>% mutate(
  type = as.character(type),
  type = ifelse(type == "aggregator", "portal", type))

news <- read_csv("../results/merged_issues.csv") %>% 
  filter(party %in% c("dem", "rep")) %>% 
  rename(gender = female)





### BEGIN Investigate Partisan Strength

news <- news %>% mutate(strength = case_when(
  pid %in% c(1, 7) ~ "strong",
  pid %in% c(2, 6) ~ "weak",
  pid %in% c(3, 5) ~ "leaning",
  pid %in% c(4) ~ "true_ind"))

subs <- news %>% filter(!is.na(bakshy_domain)) %>% 
  select(caseid, party, strength) %>%
  group_by(caseid, party, strength) %>% 
  summarise(total_visits = n()) %>% ungroup()

ldf <- news %>% group_by(bakshy_domain, caseid) %>% 
  summarise(visits = n()) %>% ungroup() %>% 
  complete(bakshy_domain, caseid, fill = list(visits = 0)) %>% 
  left_join(subs, by = "caseid") %>% 
  filter(!is.na(total_visits))

ldf <- news %>% select(bakshy_domain, b_align) %>% distinct() %>% 
  right_join(ldf, by = "bakshy_domain")

ldf <- ldf %>% mutate(
  rep = 1 * (party == "rep"),
  dem = 1 * (party == "dem"),
  strong = 1 * (strength == "strong"),
  weak = 1 * (strength == "weak"),
  leaner = 1 * (strength == "leaning"),
  pct_diet = 100.0 * visits / total_visits,
  visit_once = 1 * (visits > 0))

ldf <- ldf %>% filter(!is.na(bakshy_domain))

ldf <- ldf %>% left_join(news %>% select(caseid, weight) %>% distinct(), by = "caseid")

library(lmtest)
library(sandwich)
library(lfe)



mod5 <- felm(
  visit_once ~ b_align:rep | caseid + bakshy_domain | 0 | caseid,
  data = ldf,
  weights = ldf$weight)

mod6 <- felm(
  visit_once ~ b_align:rep + b_align:rep:leaner +
  b_align:rep:weak| caseid + bakshy_domain | 0 | caseid,
  data = ldf,
  weights = ldf$weight)

num_ind <- ldf %>% filter(!is.na(b_align)) %>%  pull(caseid) %>% n_distinct()
num_src <- ldf %>% filter(!is.na(b_align)) %>% pull(bakshy_domain) %>% n_distinct()

dv_mean <- sum(ldf$visit_once * ldf$weight, na.rm = TRUE) / sum(ldf$weight)

stargazer(mod5, mod6,
  type = "latex", style = "ajps",
  out = "../results/tables/party_align_twoway_fe_issues.tex",
  title = "Two-way fixed effects: party identification and source alignment (issue-based keywords).",
  label = "t:issue_party_align_twoway_fe",
  notes = "Standard errors clustered by individual. Regressions computed using \\texttt{lfe} package \\citep{lfe}.",
  digits = 3,
  dep.var.labels = c(
    rep("Visit Domain", 1)),
  covariate.labels = c(
    "REP * Alignment",
    "REP * Alignment * Leaner",
    "REP * Alignment * Weak"),
  keep.stat = "n",
  add.lines = list(
    c("Weighted Mean", rep(round(dv_mean, 3), 2)),
    c("Individuals", rep(num_ind, 2)),
    c("Sources", rep(num_src, 2)),
    c("Individual FEs", rep("\\checkmark", 2)),
    c("Source FEs", rep("\\checkmark", 2)),
    c("Weighted", rep("\\checkmark", 2))))
